Google form analysis visualizations


In [ ]:
%run "../Functions/2. Google form analysis.ipynb"

'Google form analysis' functions checks


In [ ]:

'Google form analysis' functions tinkering


In [ ]:
binarizedAnswers = plotBasicStats(getSurveysOfBiologists(gform), 'non biologists', includeUndefined = True)

In [ ]:
gform.loc[:, [localplayerguidkey, QTemporality]].groupby(QTemporality).count()

In [ ]:
#sample = gform.copy()
samples = [
            [gform.copy(), 'complete set'],
            [gform[gform[QLanguage] == enLanguageID], 'English'],
            [gform[gform[QLanguage] == frLanguageID], 'French'],
            [gform[gform[QGender] == 'Female'], 'female'],
            [gform[gform[QGender] == 'Male'], 'male'],
            [getSurveysOfUsersWhoAnsweredBoth(gform), 'answered both'],
            [getSurveysOfUsersWhoAnsweredBoth(gform[gform[QLanguage] == enLanguageID]), 'answered both, en'],
            [getSurveysOfUsersWhoAnsweredBoth(gform[gform[QLanguage] == frLanguageID]), 'answered both, fr'],
            [getSurveysOfUsersWhoAnsweredBoth(gform[gform[QGender] == 'Female']), 'answered both, female'],
            [getSurveysOfUsersWhoAnsweredBoth(gform[gform[QGender] == 'Male']), 'answered both, male'],
        ]

_progress = FloatProgress(min=0, max=len(samples))
display(_progress)

includeAll = False
includeBefore = True
includeAfter = True
includeUndefined = False
includeProgress = True
includeRelativeProgress = False

for sample, title in samples:

    ## basic stats:
    ### mean score
    ### median score
    ### std
    ## sample can be: all, those who answered both before and after,
    ## those who played between date1 and date2, ...
    #def plotBasicStats(sample, title, includeAll, includeBefore, includeAfter, includeUndefined, includeProgress, includeRelativeProgress):

    
    stepsPerInclude = 2
    includeCount = np.sum([includeAll, includeBefore, includeAfter, includeUndefined, includeProgress])
    stepsCount = stepsPerInclude*includeCount + 3
    
    #print("stepsPerInclude=" + str(stepsPerInclude))
    #print("includeCount=" + str(includeCount))
    #print("stepsCount=" + str(stepsCount))
    
    __progress = FloatProgress(min=0, max=stepsCount)
    display(__progress)
    
    sampleBefore = sample[sample[QTemporality] == answerTemporalities[0]]
    sampleAfter = sample[sample[QTemporality] == answerTemporalities[1]]
    sampleUndefined = sample[sample[QTemporality] == answerTemporalities[2]]

    #uniqueBefore = sampleBefore[localplayerguidkey]
    #uniqueAfter = 
    #uniqueUndefined =

    scientificQuestionsSource = correctAnswers.copy()
    allQuestions = correctAnswers + demographicAnswers
    
    categories = ['all', answerTemporalities[0], answerTemporalities[1], answerTemporalities[2], 'progress', 'rel. progress']
    data = {}
    
    sciBinarized = pd.DataFrame()
    allBinarized = pd.DataFrame()
    scoresAll = pd.DataFrame()
    
    sciBinarizedBefore = pd.DataFrame()
    allBinarizedBefore = pd.DataFrame()
    scoresBefore = pd.DataFrame()
    
    sciBinarizedAfter = pd.DataFrame()
    allBinarizedAfter = pd.DataFrame()
    scoresAfter = pd.DataFrame()
    
    sciBinarizedUndefined = pd.DataFrame()
    allBinarizedUndefined = pd.DataFrame()
    scoresUndefined = pd.DataFrame()

    scoresProgress = pd.DataFrame()

    ## basic stats:
    ### mean score
    ### median score
    ### std
    if includeAll:
        sciBinarized = getAllBinarized( _source = scientificQuestionsSource, _form = sample)
        __progress.value += 1
        allBinarized = getAllBinarized( _source = allQuestions, _form = sample)
        __progress.value += 1
        scoresAll = pd.Series(np.dot(sciBinarized, np.ones(sciBinarized.shape[1])))
        
        data[categories[0]] = createStatSet(scoresAll, sample[localplayerguidkey])
        
    if includeBefore or includeProgress:
        sciBinarizedBefore = getAllBinarized( _source = scientificQuestionsSource, _form = sampleBefore)
        __progress.value += 1
        allBinarizedBefore = getAllBinarized( _source = allQuestions, _form = sampleBefore)
        __progress.value += 1
        scoresBefore = pd.Series(np.dot(sciBinarizedBefore, np.ones(sciBinarizedBefore.shape[1])))
        temporaryStatSetBefore = createStatSet(scoresBefore, sampleBefore[localplayerguidkey])
    if includeBefore:
        data[categories[1]] = temporaryStatSetBefore
        
    if includeAfter or includeProgress:
        sciBinarizedAfter = getAllBinarized( _source = scientificQuestionsSource, _form = sampleAfter)
        __progress.value += 1
        allBinarizedAfter = getAllBinarized( _source = allQuestions, _form = sampleAfter)
        __progress.value += 1
        scoresAfter = pd.Series(np.dot(sciBinarizedAfter, np.ones(sciBinarizedAfter.shape[1])))
        temporaryStatSetAfter = createStatSet(scoresAfter, sampleAfter[localplayerguidkey])
    if includeAfter:
        data[categories[2]] = temporaryStatSetAfter
        
    if includeUndefined:
        sciBinarizedUndefined = getAllBinarized( _source = scientificQuestionsSource, _form = sampleUndefined)
        __progress.value += 1
        allBinarizedUndefined = getAllBinarized( _source = allQuestions, _form = sampleUndefined)
        __progress.value += 1
        scoresUndefined = pd.Series(np.dot(sciBinarizedUndefined, np.ones(sciBinarizedUndefined.shape[1])))
        
        data[categories[3]] = createStatSet(scoresUndefined, sampleUndefined[localplayerguidkey])

    if includeProgress:
        data[categories[4]] = {
            'count' : min(temporaryStatSetAfter['count'], temporaryStatSetBefore['count']),
            'unique' : min(temporaryStatSetAfter['unique'], temporaryStatSetBefore['unique']),
            'median' : temporaryStatSetAfter['median']-temporaryStatSetBefore['median'],
            'mean' : temporaryStatSetAfter['mean']-temporaryStatSetBefore['mean'],
            'std' : temporaryStatSetAfter['std']-temporaryStatSetBefore['std'],
        }
        __progress.value += 2
    
    
    result = pd.DataFrame(data)
    __progress.value += 1

    print(title)
    print(result)
    if (includeBefore and includeAfter) or includeProgress:
        if (len(scoresBefore) > 2 and len(scoresAfter) > 2):
            ttest = ttest_ind(scoresBefore, scoresAfter)
            print("t test: statistic=" + repr(ttest.statistic) + " pvalue=" + repr(ttest.pvalue))
    print()

    ## percentage correct
    ### percentage correct - max 5 columns
    percentagePerQuestionAll = pd.DataFrame()
    percentagePerQuestionBefore = pd.DataFrame()
    percentagePerQuestionAfter = pd.DataFrame()
    percentagePerQuestionUndefined = pd.DataFrame()
    percentagePerQuestionProgress = pd.DataFrame()
    
    tables = []

    if includeAll:
        percentagePerQuestionAll = getPercentagePerQuestion(allBinarized)
        tables.append([percentagePerQuestionAll, categories[0]])
        
    if includeBefore or includeProgress:
        percentagePerQuestionBefore = getPercentagePerQuestion(allBinarizedBefore)
    if includeBefore:
        tables.append([percentagePerQuestionBefore, categories[1]])
        
    if includeAfter or includeProgress:
        percentagePerQuestionAfter = getPercentagePerQuestion(allBinarizedAfter)
    if includeAfter:
        tables.append([percentagePerQuestionAfter, categories[2]])
        
    if includeUndefined:
        percentagePerQuestionUndefined = getPercentagePerQuestion(allBinarizedUndefined)
        tables.append([percentagePerQuestionUndefined, categories[3]])
        
    if includeProgress or includeRelativeProgress:
        percentagePerQuestionProgress = percentagePerQuestionAfter - percentagePerQuestionBefore
        
        if includeProgress:
            tables.append([percentagePerQuestionProgress, categories[4]])
            
        if includeRelativeProgress:
            # use temporaryStatSetAfter['count'], temporaryStatSetBefore['count']?
            percentagePerQuestionProgress2 = percentagePerQuestionProgress.copy()
            for index in range(0,len(percentagePerQuestionProgress.index)):
                if (0 == percentagePerQuestionBefore.iloc[index,0]):
                    percentagePerQuestionProgress2.iloc[index,0] = 0
                else:
                    percentagePerQuestionProgress2.iloc[index,0] = \
                    percentagePerQuestionProgress.iloc[index,0]/percentagePerQuestionBefore.iloc[index,0]
            tables.append([percentagePerQuestionProgress2, categories[5]])
    
    __progress.value += 1

    graphTitle = '% correct: '
    toConcat = []
    
    for table,category in tables:
        concat = (len(table.values) > 0)
        for elt in table.iloc[:,0].values:
            if np.isnan(elt):
                concat = False
                break
        if(concat):
            graphTitle = graphTitle + category + ' '
            toConcat.append(table)

    if (len(toConcat) > 0):
        percentagePerQuestionConcatenated = pd.concat(
            toConcat
            , axis=1)

        if(len(title) > 0):
            graphTitle = graphTitle + ' - ' + title

        _fig = plt.figure(figsize=(20,20))
        _ax1 = plt.subplot(111)
        _ax1.set_title(graphTitle)
        sns.heatmap(percentagePerQuestionConcatenated.round().astype(int),ax=_ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d')
    __progress.value += 1
    
    ### percentage cross correct
    ### percentage cross correct, conditionnally
    
    if(__progress.value != stepsCount):
        print("__progress.value=" + str(__progress.value) + " != stepsCount=" + str(stepsCount))
    
    _progress.value += 1

if(_progress.value != len(samples)):
    print("__progress.value=" + str(__progress.value) + " != len(samples)=" + str(len(samples)))

#    sciBinarized, sciBinarizedBefore, sciBinarizedAfter, sciBinarizedUndefined, \
#            allBinarized, allBinarizedBefore, allBinarizedAfter, allBinarizedUndefined

In [ ]:
ttest = ttest_ind(scoresBefore, scoresAfter)
type(scoresBefore), len(scoresBefore),\
type(scoresAfter), len(scoresAfter),\
ttest

In [ ]:
type(tables)

In [ ]:
sciBinarized = getAllBinarized( _source = scientificQuestionsSource, _form = sample)
series = pd.Series(np.dot(sciBinarized, np.ones(sciBinarized.shape[1])))
#ids = pd.Series()
ids = sample[localplayerguidkey]

#def createStatSet(series, ids):
if(0 == len(ids)):
    ids = series.index
result = {
    'count' : len(ids),
    'unique' : len(ids.unique()),
    'median' : series.median(),
    'mean' : series.mean(),
    'std' : series.std()}
result

In [ ]:
## percentage correct
### percentage correct - 3 columns
### percentage cross correct
### percentage cross correct, conditionnally

In [ ]:
#_binarized = allBinarized
#_binarized = allBinarizedUndefined
_binarized = allBinarizedBefore
#def getPercentagePerQuestion(_binarized):
totalPerQuestionDF = pd.DataFrame(data=np.dot(np.ones(_binarized.shape[0]), _binarized), index=_binarized.columns)
percentagePerQuestion = totalPerQuestionDF*100 / _binarized.shape[0]
percentagePerQuestion

In [ ]:
#totalPerQuestion = np.dot(np.ones(allSciBinarized.shape[0]), allSciBinarized)
#totalPerQuestion.shape
totalPerQuestionSci = np.dot(np.ones(sciBinarized.shape[0]), sciBinarized)
totalPerQuestionAll = np.dot(np.ones(allBinarized.shape[0]), allBinarized)

percentagePerQuestionAll = getPercentagePerQuestion(allBinarized)
percentagePerQuestionBefore = getPercentagePerQuestion(allBinarizedBefore)
percentagePerQuestionAfter = getPercentagePerQuestion(allBinarizedAfter)
percentagePerQuestionUndefined = getPercentagePerQuestion(allBinarizedUndefined)

percentagePerQuestionConcatenated = pd.concat(
    [
        percentagePerQuestionAll,
        percentagePerQuestionBefore,
        percentagePerQuestionAfter,
        percentagePerQuestionUndefined,
    ]
    , axis=1)
_fig = plt.figure(figsize=(20,20))
_ax1 = plt.subplot(111)
_ax1.set_title('percentage correct per question: all, before, after, undefined')
sns.heatmap(percentagePerQuestionConcatenated.round().astype(int),ax=_ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d')

In [ ]:
samples = [gform, gform[gform[QLanguage] == enLanguageID], gform[gform[QLanguage] == frLanguageID],
           getSurveysOfUsersWhoAnsweredBoth(gform),
           getSurveysOfUsersWhoAnsweredBoth(gform[gform[QLanguage] == enLanguageID]),
           getSurveysOfUsersWhoAnsweredBoth(gform[gform[QLanguage] == frLanguageID])]

for sample in samples:
    sciBinarized, sciBinarizedBefore, sciBinarizedAfter, sciBinarizedUndefined, \
            allBinarized, allBinarizedBefore, allBinarizedAfter, allBinarizedUndefined = plotBasicStats(sample)

abandoned algorithms


In [ ]:
#totalPerQuestion = np.dot(np.ones(sciBinarized.shape[0]), sciBinarized)
#totalPerQuestion.shape
totalPerQuestionSci = np.dot(np.ones(sciBinarized.shape[0]), sciBinarized)
totalPerQuestionAll = np.dot(np.ones(allBinarized.shape[0]), allBinarized)
totalPerQuestionDFAll = pd.DataFrame(data=np.dot(np.ones(allBinarized.shape[0]), allBinarized), index=allBinarized.columns)
percentagePerQuestionAll = totalPerQuestionDFAll*100 / allBinarized.shape[0]
#totalPerQuestionDF
#percentagePerQuestion

#before
totalPerQuestionDFBefore = pd.DataFrame(
    data=np.dot(np.ones(allBinarizedBefore.shape[0]), allBinarizedBefore), index=allBinarizedBefore.columns
)
percentagePerQuestionBefore = totalPerQuestionDFBefore*100 / allBinarizedBefore.shape[0]

#after
totalPerQuestionDFAfter = pd.DataFrame(
    data=np.dot(np.ones(allBinarizedAfter.shape[0]), allBinarizedAfter), index=allBinarizedAfter.columns
)
percentagePerQuestionAfter = totalPerQuestionDFAfter*100 / allBinarizedAfter.shape[0]

_fig = plt.figure(figsize=(20,20))
ax1 = plt.subplot(131)
ax2 = plt.subplot(132)
ax3 = plt.subplot(133)
ax2.get_yaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
sns.heatmap(percentagePerQuestionAll.round().astype(int),ax=ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionBefore.round().astype(int),ax=ax2,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionAfter.round().astype(int),ax=ax3,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=True)
ax1.set_title('percentage correct per question - all')
ax2.set_title('percentage correct per question - before')
ax3.set_title('percentage correct per question - after')
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
_fig.tight_layout()

_fig = plt.figure(figsize=(20,20))
ax1 = plt.subplot(131)
ax2 = plt.subplot(132)
ax3 = plt.subplot(133)
ax2.get_yaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
sns.heatmap(percentagePerQuestionAll.round().astype(int),ax=ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionBefore.round().astype(int),ax=ax2,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionAfter.round().astype(int),ax=ax3,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=True)
ax1.set_title('percentage correct per question - all')
ax2.set_title('percentage correct per question - before')
ax3.set_title('percentage correct per question - after')
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
_fig.tight_layout()

In [ ]:
_fig = plt.figure(figsize=(20,20))
ax1 = plt.subplot(131)
ax2 = plt.subplot(132)
ax3 = plt.subplot(133)
ax2.get_yaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
sns.heatmap(percentagePerQuestionAll.round().astype(int),ax=ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionBefore.round().astype(int),ax=ax2,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=False)
sns.heatmap(percentagePerQuestionAfter.round().astype(int),ax=ax3,cmap=plt.cm.jet,square=True,annot=True,fmt='d', cbar=True)
ax1.set_title('percentage correct per question - all')
ax2.set_title('percentage correct per question - before')
ax3.set_title('percentage correct per question - after')
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
_fig.tight_layout()

In [ ]:
percentagePerQuestionConcatenated = pd.concat([
    percentagePerQuestionAll,
    percentagePerQuestionBefore,
    percentagePerQuestionAfter]
    , axis=1)
_fig = plt.figure(figsize=(20,20))
_ax1 = plt.subplot(111)
_ax1.set_title('percentage correct per question: all, before, after')
sns.heatmap(percentagePerQuestionConcatenated.round().astype(int),ax=_ax1,cmap=plt.cm.jet,square=True,annot=True,fmt='d')

sample getters tinkering


In [ ]:
##### getRMAfter / Before tinkering
#def getRMAfters(sample):
afters = sample[sample[QTemporality] == answerTemporalities[1]]
#def getRMBefores(sample):
befores = sample[sample[QTemporality] == answerTemporalities[0]]

set operators


In [ ]:
# equality tests
#(sample1.columns == sample2.columns).all()
#sample1.columns.duplicated().any() or sample2.columns.duplicated().any()
#pd.concat([sample1, sample2], axis=1).columns.duplicated().any()
getUnionQuestionnaires tinkering

In [ ]:
sample1 = befores
sample2 = afters

#def getUnionQuestionnaires(sample1, sample2):
if (not (sample1.columns == sample2.columns).all()):
    print("warning: parameter columns are not the same")
result = pd.concat([sample1, sample2]).drop_duplicates()
getIntersectionQuestionnaires tinkering

In [ ]:
sample1 = befores[:15]
sample2 = befores[10:]

#def getIntersectionQuestionnaires(sample1, sample2):
if (not (sample1.columns == sample2.columns).all()):
    print("warning: parameter columns are not the same")
result = pd.merge(sample1, sample2, how = 'inner').drop_duplicates()
getIntersectionUsersSurveys tinkering

In [ ]:
sample1 = befores
sample2 = afters

# get sample1 and sample2 rows where users are common to sample1 and sample2
#def getIntersectionUsersSurveys(sample1, sample2):
result1 = sample1[sample1[localplayerguidkey].isin(sample2[localplayerguidkey])]
result2 = sample2[sample2[localplayerguidkey].isin(sample1[localplayerguidkey])]
result = getUnionQuestionnaires(result1,result2)

In [ ]:
len(sample1), len(sample2), len(result)
getGFormBefores tinkering

In [ ]:
sample = gform


# returns users who declared that they have never played the game, whatever platform
#  previousPlayPositives is defined in '../Static data/English localization.ipynb'
#def getGFormBefores(sample):
befores = sample[
      ~sample[QPlayed1].isin(previousPlayPositives)
    & ~sample[QPlayed2].isin(previousPlayPositives)
    & ~sample[QPlayed3].isin(previousPlayPositives)
    & ~sample[QPlayed4].isin(previousPlayPositives)
                ]
len(befores)
getGFormAfters tinkering

In [ ]:
sample = gform

# returns users who declared that they have already played the game, whatever platform
#  previousPlayPositives is defined in '../Static data/English localization.ipynb'
#def getGFormAfters(sample):
afters = sample[
      sample[QPlayed1].isin(previousPlayPositives)
    | sample[QPlayed2].isin(previousPlayPositives)
    | sample[QPlayed3].isin(previousPlayPositives)
    | sample[QPlayed4].isin(previousPlayPositives)
                ]
len(afters)
getGFormTemporality tinkering

In [ ]:
_GFUserId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[3]
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
sample = gform

In [ ]:
answerTemporalities[1]

In [ ]:
#while result != answerTemporalities[1]:
_GFUserId = getRandomGFormGUID()
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]

# returns an element of answerTemporalities
#  previousPlayPositives is defined in '../Static data/English localization.ipynb'
#def getGFormRowGFormTemporality(_gformRow):
result = answerTemporalities[2]
if (_gformRow[QPlayed1] in previousPlayPositives)\
    or (_gformRow[QPlayed2] in previousPlayPositives)\
    or (_gformRow[QPlayed3] in previousPlayPositives)\
    or (_gformRow[QPlayed4] in previousPlayPositives):
    result = answerTemporalities[1]
else:
    result = answerTemporalities[0]
result

getSurveysOfUsersWhoAnsweredBoth tinkering


In [ ]:
sample = gform
gfMode = True
rmMode = False

#def getSurveysOfUsersWhoAnsweredBoth(sample, gfMode = True, rmMode = False):

befores = sample
afters = sample

if gfMode:
    befores = getGFormBefores(befores)
    afters = getGFormAfters(afters)

if rmMode:
    befores = getRMBefores(befores)
    afters = getRMAfters(afters)

result = getIntersectionUsersSurveys(befores, afters)

((len(getGFormBefores(sample)),\
len(getRMBefores(sample)),\
len(befores)),\
(len(getGFormAfters(sample)),\
len(getRMAfters(sample)),\
len(afters)),\
len(result)),\
\
((getUniqueUserCount(getGFormBefores(sample)),\
getUniqueUserCount(getRMBefores(sample)),\
getUniqueUserCount(befores)),\
(getUniqueUserCount(getGFormAfters(sample)),\
getUniqueUserCount(getRMAfters(sample)),\
getUniqueUserCount(afters)),\
getUniqueUserCount(result))

In [ ]:
len(getSurveysOfUsersWhoAnsweredBoth(gform, gfMode = True, rmMode = True)[localplayerguidkey])

getSurveysThatAnswered tinkering


In [ ]:
sample = gform

#_GFUserId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[1]
#sample = gform[gform[localplayerguidkey] == _GFUserId]

hardPolicy = True
questionsAndPositiveAnswers = [[QStudiedBiology, biologyStudyPositives],
                               [QHeardSynBioOrBioBricks, heardAboutBioBricksPositives],

#def getSurveysThatAnswered(sample, questionsAndPositiveAnswers, hardPolicy = True):
filterSeries = []
if hardPolicy:
    filterSeries = pd.Series(True, sample.index)
    for question, positiveAnswers in questionsAndPositiveAnswers:
        filterSeries = filterSeries & (sample[question].isin(positiveAnswers))
else:
    filterSeries = pd.Series(False, sample.index)
    for question, positiveAnswers in questionsAndPositiveAnswers:
        filterSeries = filterSeries | (sample[question].isin(positiveAnswers))
result = sample[filterSeries]

getSurveysOfBiologists tinkering


In [ ]:
sample = gform
hardPolicy = True

#def getSurveysOfBiologists(sample, hardPolicy = True):
#QStudiedBiology biologyStudyPositives
#irrelevant QInterestBiology biologyInterestPositives
#QHeardSynBioOrBioBricks heardAboutBioBricksPositives

questionsAndPositiveAnswers = [[QStudiedBiology, biologyStudyPositives],
                           [QHeardSynBioOrBioBricks, heardAboutBioBricksPositives]],

result = getSurveysThatAnswered(sample, questionsAndPositiveAnswers, hardPolicy)
print(len(result) > 0)

In [ ]:
gform.index

In [ ]:
len(result)

In [ ]:
_GFUserId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[0]
sample = gform[gform[localplayerguidkey] == _GFUserId]
len(getSurveysOfBiologists(sample)) > 0

getSurveysOfGamers tinkering


In [ ]:
sample = gform
hardPolicy = True

#def getSurveysOfGamers(sample, hardPolicy = True):
#QInterestVideoGames #interestPositives
#QPlayVideoGames #frequencyPositives

questionsAndPositiveAnswers = [[QInterestVideoGames, interestPositives], [QPlayVideoGames, frequencyPositives]]

result = getSurveysThatAnswered(sample, questionsAndPositiveAnswers, hardPolicy)

In [ ]:
len(result)

In [ ]:
type(filterSeries)

In [ ]:
len(afters[afters[QPlayed1].isin(previousPlayPositives)
           | afters[QPlayed2].isin(previousPlayPositives)
           | afters[QPlayed3].isin(previousPlayPositives)
           | afters[QPlayed4].isin(previousPlayPositives)
          ]),\
len(afters[afters[QPlayed1].isin(previousPlayPositives)]),\
len(afters[afters[QPlayed2].isin(previousPlayPositives)]),\
len(afters[afters[QPlayed3].isin(previousPlayPositives)]),\
len(afters[afters[QPlayed4].isin(previousPlayPositives)])

getSurveysWithMatchingAnswers tinkering


In [ ]:
_GFUserId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
sample = gform

In [ ]:
sample = gform
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
hardPolicy = False
#QAge
#QGender
#QInterestVideoGames
#QPlayVideoGames
#QStudiedBiology
#QInterestBiology
#QHeardSynBioOrBioBricks
#QLanguage
strictList = [QAge, QGender]
extendedList = [QInterestVideoGames, QPlayVideoGames, QStudiedBiology, QHeardSynBioOrBioBricks, QLanguage]



#def getSurveysWithMatchingAnswers(sample, _gformRow, strictList, extendedList = [], hardPolicy = False):
questions = strictList

if (hardPolicy):
    questions += extendedList

questionsAndPositiveAnswers = []
for q in questions:
    questionsAndPositiveAnswers.append([q, [_gformRow[q]]])

getSurveysThatAnswered(sample, questionsAndPositiveAnswers, True)

getMatchingDemographics tinkering


In [ ]:
sample = gform
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
hardPolicy = True

#def getMatchingDemographics(sample, _gformRow, hardPolicy = False):
# age and gender
#QAge
#QGender

# interests, hobbies, and knowledge - evaluation may vary after playing
#QInterestVideoGames
#QPlayVideoGames
#QStudiedBiology
#QInterestBiology
#QHeardSynBioOrBioBricks

# language may vary: players may have missed the opportunity to set it, or may want to try and change it
#QLanguage

getSurveysWithMatchingAnswers(
    sample, 
    _gformRow, [QAge, QGender], 
    extendedList = [QInterestVideoGames, QPlayVideoGames, QStudiedBiology, QHeardSynBioOrBioBricks, QLanguage], 
    hardPolicy = hardPolicy
)

In [ ]:
questionsAndPositiveAnswers

getGFormRowCorrection tinkering


In [ ]:
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
_source = correctAnswers

#def getGFormRowCorrection( _gformRow, _source = correctAnswers):
result = _gformRow.copy()

if(len(_gformRow) == 0):
    print("this gform row is empty")

else:
    result = pd.Series(index = _gformRow.index, data = np.full(len(_gformRow), np.nan))

    for question in result.index:
        _correctAnswers = _source.loc[question]

        if(len(_correctAnswers) > 0):
            result.loc[question] = False
            for _correctAnswer in _correctAnswers:
                if str(_gformRow.loc[question]).startswith(str(_correctAnswer)):
                    result.loc[question] = True
                    break
result

getGFormRowScore tinkering


In [ ]:
_gformRow = gform[gform[localplayerguidkey] == _GFUserId].iloc[0]
_source = correctAnswers


#def getGFormRowScore( _gformRow, _source = correctAnswers):
correction = getGFormRowCorrection( _gformRow, _source = _source)
_counts = correction.value_counts()
_thisScore = 0
if(True in _counts):
    _thisScore = _counts[True]
_thisScore

getGFormDataPreview tinkering


In [ ]:
_GFUserId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
sample = gform


# for per-gform, manual analysis
#def getGFormDataPreview(_GFUserId, sample):
gforms = gform[gform[localplayerguidkey] == _GFUserId]
result = {}
for _ilocIndex in range(0, len(gforms)):
    gformsIndex = gforms.index[_ilocIndex]
    currentGForm = gforms.iloc[_ilocIndex]
    
    subresult = {}
    subresult['date'] = currentGForm[QTimestamp]
    subresult['temporality RM'] = currentGForm[QTemporality]
    subresult['temporality GF'] = getGFormRowGFormTemporality(currentGForm)
    subresult['score'] = getGFormRowScore(currentGForm)
    subresult['genderAge'] = [currentGForm[QGender], currentGForm[QAge]]
    
    # search for other users with similar demographics
    matchingDemographics = getMatchingDemographics(sample, currentGForm)
    matchingDemographicsIds = []
    #print(type(matchingDemographics))
    #print(matchingDemographics.index)
    for matchesIndex in matchingDemographics.index:
        matchingDemographicsIds.append([matchesIndex, matchingDemographics.loc[matchesIndex, localplayerguidkey]])
    
    subresult['demographic matches'] = matchingDemographicsIds
    
    result['survey' + str(_ilocIndex)] = subresult
    
print(result)

In [ ]:
for match in result['survey0']['demographic matches']:
    print(match[0])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: